
from typing import List
from pydantic import BaseModel, Field



from llama_index.core import SimpleDirectoryReader
from llama_index.core import VectorStoreIndex, SimpleDirectoryReader

from llama_index.core import SimpleDirectoryReader
from llama_index.core.node_parser import SimpleNodeParser
from llama_index.core import  GPTVectorStoreIndex,VectorStoreIndex
from llama_index.llms import openai_like
from llama_index.core import Settings
from llama_index.llms.ollama import Ollama
from llama_index.embeddings.huggingface import HuggingFaceEmbedding  # HuggingFaceEmbedding:用于将文本转换为词向量
from llama_index.llms.huggingface import HuggingFaceLLM  # HuggingFaceLLM：用于运行Hugging Face的预训练语言模型
from llama_index.core import Settings,SimpleDirectoryReader,VectorStoreIndex
import chromadb
from llama_index.embeddings.dashscope import DashScopeEmbedding
from llama_index.vector_stores.chroma import ChromaVectorStore
from llama_index.core import StorageContext, load_index_from_storage
from llama_index.llms.deepseek  import DeepSeek
from llama_index.embeddings.fastembed import FastEmbedEmbedding

from llama_index.core import QueryBundle

# import NodeWithScore
from llama_index.core.schema import NodeWithScore

# Retrievers
from llama_index.core.retrievers import (
    BaseRetriever,
    VectorIndexRetriever,
    KeywordTableSimpleRetriever,
)
    # 连接Chroma数据库


llm = DeepSeek(model="deepseek-chat", api_key="sk-605e60a1301040759a821b6b677556fb")
Settings.llm = llm
 
from zhipuai import ZhipuAI
from llama_index.embeddings.zhipuai import ZhipuAIEmbedding

embeddings = ZhipuAIEmbedding(
    model="embedding-2",
    api_key="f387f5e4837d4e4bba6d267682a957c9.PmPiTw8qVlsI2Oi5"
    # With the `embedding-3` class
    # of models, you can specify the size
    # of the embeddings you want returned.
    # dimensions=1024
)
Settings.embed_model=embeddings
from llama_index.core import VectorStoreIndex, SimpleDirectoryReader
from llama_index.core.postprocessor import StructuredLLMRerank

from llama_index.llms.openai import OpenAI
from IPython.display import Markdown, display

documents = SimpleDirectoryReader(
    input_files=["./data/10k/lyft_2021.pdf"]
).load_data()

from llama_index.core.retrievers import VectorIndexRetriever
from llama_index.core import QueryBundle
import pandas as pd
from IPython.display import display, HTML
from copy import deepcopy




'''
index = VectorStoreIndex.from_documents(
    documents,
)

def get_retrieved_nodes(
    query_str, vector_top_k=10, reranker_top_n=3, with_reranker=False
):
    query_bundle = QueryBundle(query_str)
    # configure retriever
    retriever = VectorIndexRetriever(
        index=index,
        similarity_top_k=vector_top_k,
    )
    retrieved_nodes = retriever.retrieve(query_bundle)

    if with_reranker:
        # configure reranker
        reranker = StructuredLLMRerank(
            choice_batch_size=5,
            top_n=reranker_top_n,
        )
        retrieved_nodes = reranker.postprocess_nodes(
            retrieved_nodes, query_bundle
        )

    return retrieved_nodes


def pretty_print(df):
    return display(HTML(df.to_html().replace("\\n", "<br>")))


def visualize_retrieved_nodes(nodes) -> None:
    result_dicts = []
    for node in nodes:
        node = deepcopy(node)
        node.node.metadata = {}
        node_text = node.node.get_text()
        node_text = node_text.replace("\n", " ")

        result_dict = {"Score": node.score, "Text": node_text}
        result_dicts.append(result_dict)

    pretty_print(pd.DataFrame(result_dicts))

new_nodes = get_retrieved_nodes(
    "What is Lyft's response to COVID-19?", vector_top_k=5, with_reranker=False
)
visualize_retrieved_nodes(new_nodes)

'''